import json
import time
import requests
import re

session = requests.Session()
url = "https://www.maoyan.com/board/4"

def get_page(url):
    headers = {
        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
        'Cookie': '__mta=210556842.1670398312063.1670571944527.1670571948545.52; uuid_n_v=v1; uuid=375781C0760111EDA0C51946765EC455664861E57863410A8EA0B89BC4F05D0E; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=184eb7f7d6cc8-05804e3eeb3e11-17525635-13c680-184eb7f7d6cc8; _lxsdk=375781C0760111EDA0C51946765EC455664861E57863410A8EA0B89BC4F05D0E; __mta=210556842.1670398312063.1670399286168.1670399292593.22; _csrf=4ed88064ed229263ae1cf888429ddbbcd9ad827820388930f4c03f79ad80b1b1; Hm_lvt_703e94591e87be68cc8da0da7cbd0be2=1670398312,1670571901; Hm_lpvt_703e94591e87be68cc8da0da7cbd0be2=1670571948; _lxsdk_s=184f5d84075-07e-efc-616%7C%7C8',
        'Host': 'www.maoyan.com'
    }
    
    for i in range(10):
        list = []
        try: 
            res = session.get(url + '?offset=' + str(i * 10), headers=headers)    
        except requests.HTTPError as e:
            print(e)
            return None
        reg = re.compile('<img.*?data-src="(.*?)".*?alt="(.*?)".*?board-img.*?>.*?<p.*?class="star">(.*?)</p>.*?<p.*?class="releasetime">(.*?)</p>', re.S)
        findStr = re.findall(reg, res.text)
        for item in findStr: 
            list.append({ 
                'img': item[0],
                'name': item[1],
                'protagonist': item[2],
                'releaseTime': item[3], 
            })
        write_to_file(list)

def write_to_file(content): 
    with open('maoyanTOP100.txt', 'a', encoding='utf-8') as f:
        print(type(json.dumps(content)))
        f.write(json.dumps(content, ensure_ascii=False) + '\n')
        time.sleep(1)

get_page(url)